---
title: "Cleaning Life in Transition Survey"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load life in transition 2
  life_in_trans_2_raw <-
    read_dta("../raw-data/y-multi-life-in-transition-survey/LiTS2/lits2.dta", encoding = "latin1")

# load life in transition 3
  life_in_trans_3_raw <-
    read_dta("../raw-data/y-multi-life-in-transition-survey/LiTS3/LiTS III.dta", encoding = "latin1")
```

# Clean Life in Transition 1

-NOT PROCESSED:
--REASONS:
---No religious intolerance question

# Clean Life in Transition 2: Randomly selected individual from the household was asked the social distance question

```{r}
# declare dates
  life_in_trans_2_dates <- 
    tribble(
      ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
      "Albania", "Sep. 16, 2010", "Oct. 22, 2010",
      "Armenia", "Sep. 26, 2010", "Oct. 23, 2010",
      "Azerbaijan", "Sep. 20, 2010", "Nov. 18, 2010", 
      "Belarus", "Sep. 27, 2010", "Nov. 16, 2010",
      "Bosnia", "Sep. 20, 2010", "Oct. 28, 2010",
      "Bulgaria", "Sep. 15, 2010", "Oct. 26, 2010",
      "Croatia", "Sep. 16, 2010", "Oct. 29, 2010",
      "Czechia", "Oct. 4, 2010", "Dec. 1, 2010",
      "Estonia", "Sep. 27, 2010", "Nov. 6, 2010",
      "France", "Oct. 1, 2010", "Nov. 15, 2010",
      "Georgia", "Sep. 26, 2010", "Oct. 25, 2010",
      "Germany", "Sep. 11, 2010", "Oct. 18, 2010",
      "UK", "Sep. 8, 2010", "Dec. 15, 2010",
      "Hungary", "Sep. 30, 2010", "Nov. 10, 2010",
      "Italy", "Sep. 30, 2010", "Nov. 1, 2010",
      "Kazakhstan", "Sep. 24, 2010", "Oct. 22, 2010", 
      "Kosovo", "Sep. 15, 2010", "Oct. 20, 2010",
      "Kyrgyzstan", "Sep. 24, 2010", "Oct. 19, 2010",
      "Latvia", "Oct. 1, 2010", "Nov. 7, 2010",
      "Lithuania", "Sep. 17, 2010", "Nov. 17, 2010",
      "North Macedonia", "Sep. 11, 2010", "Oct. 21, 2010",
      "Moldova", "Oct. 10, 2010", "Nov. 16, 2010",
      "Mongolia", "Sep. 26, 2010", "Oct. 29, 2010",
      "Montenegro", "Sep. 14, 2010", "Oct. 18, 2010",
      "Poland", "Sep. 24, 2010", "Oct. 25, 2010",
      "Romania", "Sep. 29, 2010", "Nov. 5, 2010",
      "Russia", "Sep. 17, 2010", "Oct. 30, 2010",
      "Serbia", "Sep. 11, 2010", "Oct. 21, 2010",
      "Slovakia", "Oct. 5, 2010", "Dec. 10, 2010",
      "Slovenia", "Sep. 15, 2010", "Nov. 19, 2010",
      "Sweden", "Oct. 5, 2010", "Nov. 23, 2010",
      "Tajikistan", "Sep. 28, 2010", "Nov. 11, 2010",
      "Turkey", "Sep. 17, 2010", "Oct. 12, 2010",
      "Ukraine", "Sep. 15, 2010", "Oct. 17, 2010",
      "Uzbekistan", "Nov. 16, 2010", "Jan. 25, 2011") %>% 
    mutate(
      resp_interview_start_date = as.Date(resp_interview_start_date, "%b. %d, %Y"),
      resp_interview_end_date = as.Date(resp_interview_end_date, "%b. %d, %Y"))

# cleaning life in transition 2
  clean_life_in_trans_2 <-
    life_in_trans_2_raw %>%
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Life in Transition Survey",
        
      # round number (character vector, title case)  
        resp_round = "Round 2",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3PLGeX3",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person", # gn: confirmed in technical document   

      # country (character vector; list of countries as written in original source)
        resp_country_original = country_,

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = as.Date(as.character(dater1), "%d%m%y")) %>% 
        left_join(
          life_in_trans_2_dates, by = "resp_country_common") %>% 
        mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          case_when(
            q716 %in% c(-99, -98, -97, -90, -1, 1) ~ NA_character_,
            q716 == 2 ~ "Buddhist",
            q716 == 3 ~ "Jewish",
            q716 %in% c(4:6) ~ "Christian",
            q716 == 7 ~ "Muslim",
            q716 == 8 ~ "Other religion"
          ),      

      # respondent's religion (character vector that corresponds to master list)
        resp_denomination =
          dplyr::recode(
            as.character(q716),
            "1" = NA_character_, #Atheistic/agnostic
            "2" = "Buddhist",
            "3" = "Jewish",
            "4" = "Orthodox Christian",
            "5" = "Catholic",
            "6" = "Protestant/Other Christian",
            "7" = "Muslim",
            "8" = "Other",
            .default = NA_character_),   
    
      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age = 
          dplyr::recode(
            as.character(respondentage),
            "-1" = NA_character_),        
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(q515),
            "1" = "1. No degree/No education [No education]",
            "2" = "2. Primary education [Primary]",
            "3" = "3. Lower secondary education [Primary]",
            "4" = "4. (Upper) secondary education [Primary]",
            "5" = "5. Post-secondary non-tertiary education [Primary]",
            "6" = "6. Bachelor's degree or more [College]",
            "7" = "7. Master's degree or PhD [College]"),       
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female =
          case_when(
            Select_r == 1 ~ 0,
            Select_r == 2 ~ 1),
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural = 
          case_when(
            tablec == 2 ~ 1, # "type of settlement"
            tablec %in% c(1, 3) ~ 0),
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: 3.33 [q333j]; QTEXT: On this list are various groups of people. Could you please mention any that you would not like to have as neighbours? Please just read out the letter that applies. People of a different religion.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(q333j),
            "-90" = NA_character_,
            "-1" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            q333j == 1 ~ 1,
            q333j == 0 ~ 0,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 2 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_2_qinfo = "NUM: 3.33 [q333n]; QTEXT: On this list are various groups of people. Could you please mention any that you would not like to have as neighbours? Please just read out the letter that applies. Jewish people.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Jewish; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_2_original = 
           dplyr::recode(
            as.character(q333n),
            "-90" = NA_character_,
            "-1" = NA_character_),

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_2_bin_recode = 
          case_when(
            q333n == 1 ~ 1,
            q333n == 0 ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: 3.02 [q302]; QTEXT: Generally speaking, would you say that most people can be trusted, or that you can't be too careful in dealing with people?; ROPTIONS: 1 = Complete distrust [=1] + 2 = Some distrust [=1] + 3 = Neither trust nor distrust [=0] + 4 = Some trust [=0] + 5 = Complete trust [=0]",

      # original response (as character vector)
        resp_gentrust_original = 
          dplyr::recode(
            as.character(q302),
            "9" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = 
          case_when(
            q302 %in% c(3:5) ~ 0,
            q302 %in% c(1:2) ~ 1,
            TRUE ~ NA_real_),
    
    ################### 
    ### RELIGIOSITY ###  
    ###################
    
        resp_religiosity_qinfo = "NUM: 7.13 [q713a]; QTEXT: Here is a list of voluntary organizations. For each one, please indicate, whether you are an active member, an inactive member, or not a member of that type of organization. Church and religious organizations.; ROPTIONS: 1 = Active Member + 2 = Passive Member + 3 = Not a Member",
    
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(q713a),
            `-90` = NA_real_,
            `-1` = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = 
          case_when(
            resp_religiosity_original == 1 ~ 1,
            resp_religiosity_original == 2 ~ 0.5,
            resp_religiosity_original == 3 ~ 0)
    
    ) %>% 
    select(starts_with("resp_"))
```

# Clean Life in Transition 3: Head of the Household was asked 

```{r}
# declare dates
  life_in_trans_3_dates <- 
      tribble(
        ~resp_country_common, ~resp_interview_start_date, ~resp_interview_end_date,
        "Albania", "Jan. 30, 2016", "March. 2, 2016",
        "Armenia", "Dec. 28, 2015", "May. 2, 2016",
        "Azerbaijan", "Apr. 15, 2016", "Jun. 8, 2016", 
        "Belarus", "Apr. 2, 2016", "May. 22, 2016",
        "Bosnia", "Dec. 16, 2015", "Feb. 10, 2016",
        "Bulgaria", "Dec. 11, 2015", "Feb. 1, 2016",
        "Croatia", "Dec. 31, 2015", "Feb. 21, 2016", #official_start_fieldwork (31/12/16) - should be (31/12/15) as (start date < end date).
        "Cyprus", "Dec. 5, 2015", "Feb. 14, 2016",
        "Czechia", "Jan. 11, 2016", "May. 5, 2016",
        "Estonia", "Dec. 19, 2015", "Feb. 15, 2016",
        "Georgia", "Feb. 20, 2016", "May. 30, 2016",
        "Germany", "Jan. 6, 2016", "Feb. 23, 2016",
        "Greece", "Nov. 18, 2015", "Feb. 18, 2016",
        "Hungary", "Feb. 1, 2016", "May. 8, 2016",
        "Italy", "Jan. 13, 2016", "Mar. 1, 2016",
        "Kazakhstan", "Mar. 1, 2016", "May. 19, 2016", 
        "Kosovo", "Dec. 12, 2015", "Mar. 2, 2016", #start serbian_sample - end albian_sample
        "Kyrgyzstan", "Apr. 1, 2016", "May. 23, 2016",
        "Latvia", "Dec. 19, 2015", "Mar. 14, 2016", #official_start_fieldwork (19/12/16) - should be (19/12/15) as (start date < end date).
        "Lithuania", "Dec. 4, 2015", "Jan. 29, 2016",
        "North Macedonia", "Dec. 6, 2015", "Mar. 1, 2016",
        "Moldova", "Mar. 13, 2016", "May. 26, 2016",
        "Mongolia", "Nov. 25, 2015", "Dec. 24, 2015",
        "Montenegro", "Nov. 28, 2015", "Mar. 7, 2016",
        "Poland", "Jan. 11, 2016", "Feb. 27, 2016",
        "Romania", "Feb. 2, 2016", "Apr. 1, 2016",
        "Russia", "Jan. 21, 2016", "Apr. 21, 2016",
        "Serbia", "Nov. 26, 2015", "Feb. 25, 2016",
        "Slovakia", "Dec. 6, 2015", "May. 1, 2016",
        "Slovenia", "Dec. 9, 2015", "May. 23, 2016",
        "Tajikistan", "Jan. 16, 2016", "Feb. 15, 2016",
        "Turkey", "Jan. 9, 2016", "Feb. 27, 2016",
        "Ukraine", "Feb. 24, 2016", "May. 26, 2016",
        "Uzbekistan", "Jul. 2, 2016", "Jul. 20, 2016",
        ) %>% 
      mutate(
        resp_interview_start_date = as.Date(resp_interview_start_date, "%b. %d, %Y"),
        resp_interview_end_date = as.Date(resp_interview_end_date, "%b. %d, %Y"))

# cleaning life in transition 3
  clean_life_in_trans_3 <-
    life_in_trans_3_raw %>%
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "Life in Transition Survey",
        
      # round number (character vector, title case)  
        resp_round = "Round 3",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3NEyzbh",

      # survey mode (in-person/phone/internet)
        resp_survey_mode = "in-person",    

      # country (character vector; list of countries as written in original source)
        resp_country_original = country,

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
        resp_interview_date = as.Date(as.character(STIME), "%Y%m%d")) %>% 
        left_join(
          life_in_trans_3_dates, by = "resp_country_common") %>% 
        mutate(
   
    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
      # respondent's religion (character vector that corresponds to master list)
        resp_religion = 
          case_when(
            q922 %in% c(-99, 1) ~ NA_character_,
            q922 == 2 ~ "Buddhist",
            q922 == 3 ~ "Jewish",
            q922 %in% c(4:6) ~ "Christian",
            q922 == 7 ~ "Muslim",
            q922 == 8 ~ "Other religion"
          ),      

      # respondent's religion (character vector that corresponds to master list)
        resp_denomination =
          dplyr::recode(
            as.character(q922),
            "1" = NA_character_, #Atheistic/agnostic
            "2" = "Buddhist",
            "3" = "Jewish",
            "4" = "Orthodox Christian",
            "5" = "Catholic",
            "6" = "Other Christian, including Protestant",
            "7" = "Muslim",
            "8" = "Other",
            .default = NA_character_),   
    
      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age = as.character(age_pr),        
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(q109_1),
            "1" = "1. No degree/No education [No education]",
            "2" = "2. Primary education [Primary]",
            "3" = "3. Lower secondary education [Primary]",
            "4" = "4. (Upper) secondary education [Primary]",
            "5" = "5. Post-secondary non-tertiary education [Primary]",
            "6" = "6. Tertiary education (not a university diploma) [Primary]",
            "7" = "7. Bachelor's degree or more [College]",
            "8" = "8. Master's degree or PhD [College]"),        
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female =
          case_when(
            gender_pr == 1 ~ 0,
            gender_pr == 2 ~ 1),
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0)
        resp_rural =
          case_when(
            urban == 1 ~ 0,
            urban == 2 ~ 1),
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: 4.29 [q429j]; QTEXT: On this list are various groups of people. Could you please mention any that you would not like to have as neighbours? Please just read out the letter that applies. People of a different religion.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Different religion; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_1_original = as.character(q429j),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            q429j == 1 ~ 1,
            q429j == 0 ~ 0,
            TRUE ~ NA_real_),
    
    #########################  
    ### SOCIAL DISTANCE 2 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_2_qinfo = "NUM: 4.29 [q429n]; QTEXT: On this list are various groups of people. Could you please mention any that you would not like to have as neighbours? Please just read out the letter that applies. Jewish people.; ROPTIONS: 0 = Not mentioned [=0] + 1 = Mentioned [=1]; TARGET: Jewish; TYPE: Distance, neighbor",
      
      # original response (as character vector)
        resp_soc_dist_2_original = as.character(q429n),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_2_bin_recode = 
          case_when(
            q429n == 1 ~ 1,
            q429n == 0 ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: 4.03 [q403]; QTEXT: Generally speaking, would you say that most people can be trusted, or that you can't be too careful in dealing with people?; ROPTIONS: 1 = Complete distrust [=1] + 2 = Some distrust [=1] + 3 = Neither trust nor distrust [=0] + 4 = Some trust [=0] + 5 = Complete trust [=0]",

      # original response (as character vector)
        resp_gentrust_original = 
          dplyr::recode(
            as.character(q403),
            "-91" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = 
          case_when(
            q403 %in% c(3:5) ~ 0,
            q403 %in% c(1:2) ~ 1,
            TRUE ~ NA_real_),    
    
    ###################
    ### RELIGIOSITY ###  
    ###################
    
        resp_religiosity_qinfo = "NUM: 9.19a [q919a]; QTEXT: Here is a list of voluntary organizations. For each one, please indicate, whether you are an active member, an inactive member, or not a member of that type of organization. Church and religious organizations.; ROPTIONS: 1 = Active Member + 2 = Passive Member + 3 = Not a Member",
    
        resp_religiosity_original = as.numeric(q919a),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = 
          case_when(
            resp_religiosity_original == 1 ~ 1,
            resp_religiosity_original == 2 ~ 0.5,
            resp_religiosity_original == 3 ~ 0
          )
    
    ) %>% 
    select(starts_with("resp_"))
```

# Stack data

```{r}
# stack
  stacked <-
    clean_life_in_trans_2 %>%
    bind_rows(clean_life_in_trans_3)
```

# Save data

```{r}
# save data
  saveRDS(stacked, "../cleaned-data/y-12-multi-life-in-transition-survey.rds")
```
